This dataset contains statistics of various crimes cases against women from the year 2001-2014 with State or Union Territories.
import numpy as np ##for Mathematical and logical operations
import pandas as pd ##to load,organize,manipulate,model and analyze the data
import matplotlib.pyplot as plt ##for plotting graphs
import seaborn as sns ##to visualize random distributions
import warnings
warnings.filterwarnings('ignore')
## Getting the dataset
data = pd.read_csv('crimes_against_women_2001-2014.csv')
data.head()
| Unnamed: 0 | STATE/UT | DISTRICT | Year | Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | ANDHRA PRADESH | ADILABAD | 2001 | 50 | 30 | 16 | 149 | 34 | 175 | 0 |
| 1 | 1 | ANDHRA PRADESH | ANANTAPUR | 2001 | 23 | 30 | 7 | 118 | 24 | 154 | 0 |
| 2 | 2 | ANDHRA PRADESH | CHITTOOR | 2001 | 27 | 34 | 14 | 112 | 83 | 186 | 0 |
| 3 | 3 | ANDHRA PRADESH | CUDDAPAH | 2001 | 20 | 20 | 17 | 126 | 38 | 57 | 0 |
| 4 | 4 | ANDHRA PRADESH | EAST GODAVARI | 2001 | 23 | 26 | 12 | 109 | 58 | 247 | 0 |
## Getting first 5 rows
data.head(5)
| Unnamed: 0 | STATE/UT | DISTRICT | Year | Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | ANDHRA PRADESH | ADILABAD | 2001 | 50 | 30 | 16 | 149 | 34 | 175 | 0 |
| 1 | 1 | ANDHRA PRADESH | ANANTAPUR | 2001 | 23 | 30 | 7 | 118 | 24 | 154 | 0 |
| 2 | 2 | ANDHRA PRADESH | CHITTOOR | 2001 | 27 | 34 | 14 | 112 | 83 | 186 | 0 |
| 3 | 3 | ANDHRA PRADESH | CUDDAPAH | 2001 | 20 | 20 | 17 | 126 | 38 | 57 | 0 |
| 4 | 4 | ANDHRA PRADESH | EAST GODAVARI | 2001 | 23 | 26 | 12 | 109 | 58 | 247 | 0 |
## Getting last 5 rows
data.tail(5)
| Unnamed: 0 | STATE/UT | DISTRICT | Year | Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 10672 | 832 | Lakshadweep | Lakshadweep | 2014 | 1 | 0 | 0 | 1 | 2 | 0 | 0 |
| 10673 | 833 | Lakshadweep | Total District(s) | 2014 | 1 | 0 | 0 | 1 | 2 | 0 | 0 |
| 10674 | 834 | Puducherry | Karaikal | 2014 | 3 | 1 | 0 | 12 | 1 | 1 | 0 |
| 10675 | 835 | Puducherry | Puducherry | 2014 | 7 | 6 | 1 | 20 | 7 | 3 | 0 |
| 10676 | 836 | Puducherry | Total District(s) | 2014 | 10 | 7 | 1 | 32 | 8 | 4 | 0 |
## Check the rows and columns in the dataset
data.shape
(10677, 11)
## To check the total size of the data(rows*column = size)
data.size
117447
## To get all the columns present in the dataset
data.columns.to_list()
['Unnamed: 0', 'STATE/UT', 'DISTRICT', 'Year', 'Rape', 'Kidnapping and Abduction', 'Dowry Deaths', 'Assault on women with intent to outrage her modesty', 'Insult to modesty of Women', 'Cruelty by Husband or his Relatives', 'Importation of Girls']
## TO get the summary on the dataframe including dtype,shape and memory storage
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 10677 entries, 0 to 10676 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 10677 non-null int64 1 STATE/UT 10677 non-null object 2 DISTRICT 10677 non-null object 3 Year 10677 non-null int64 4 Rape 10677 non-null int64 5 Kidnapping and Abduction 10677 non-null int64 6 Dowry Deaths 10677 non-null int64 7 Assault on women with intent to outrage her modesty 10677 non-null int64 8 Insult to modesty of Women 10677 non-null int64 9 Cruelty by Husband or his Relatives 10677 non-null int64 10 Importation of Girls 10677 non-null int64 dtypes: int64(9), object(2) memory usage: 917.7+ KB
## To get the summary statistics of all the numerical columns present in the data
data.describe()
| Unnamed: 0 | Year | Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | |
|---|---|---|---|---|---|---|---|---|---|
| count | 10677.000000 | 10677.000000 | 10677.000000 | 10677.000000 | 10677.000000 | 10677.000000 | 10677.000000 | 10677.000000 | 10677.000000 |
| mean | 3871.570198 | 2007.697949 | 57.989885 | 69.888358 | 20.181699 | 113.539196 | 27.419313 | 209.224314 | 0.175330 |
| std | 2816.369918 | 4.046874 | 214.230398 | 311.623450 | 98.276531 | 458.903951 | 167.806797 | 905.664362 | 2.228637 |
| min | 0.000000 | 2001.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 1009.000000 | 2004.000000 | 8.000000 | 6.000000 | 1.000000 | 10.000000 | 0.000000 | 11.000000 | 0.000000 |
| 50% | 3678.000000 | 2008.000000 | 22.000000 | 20.000000 | 5.000000 | 34.000000 | 2.000000 | 50.000000 | 0.000000 |
| 75% | 6347.000000 | 2011.000000 | 44.000000 | 49.000000 | 16.000000 | 85.000000 | 12.000000 | 144.000000 | 0.000000 |
| max | 9016.000000 | 2014.000000 | 5076.000000 | 10626.000000 | 2469.000000 | 10001.000000 | 4970.000000 | 23278.000000 | 83.000000 |
#removing the unnamed column
data.drop('Unnamed: 0',axis=1,inplace=True)
data
| STATE/UT | DISTRICT | Year | Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ANDHRA PRADESH | ADILABAD | 2001 | 50 | 30 | 16 | 149 | 34 | 175 | 0 |
| 1 | ANDHRA PRADESH | ANANTAPUR | 2001 | 23 | 30 | 7 | 118 | 24 | 154 | 0 |
| 2 | ANDHRA PRADESH | CHITTOOR | 2001 | 27 | 34 | 14 | 112 | 83 | 186 | 0 |
| 3 | ANDHRA PRADESH | CUDDAPAH | 2001 | 20 | 20 | 17 | 126 | 38 | 57 | 0 |
| 4 | ANDHRA PRADESH | EAST GODAVARI | 2001 | 23 | 26 | 12 | 109 | 58 | 247 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10672 | Lakshadweep | Lakshadweep | 2014 | 1 | 0 | 0 | 1 | 2 | 0 | 0 |
| 10673 | Lakshadweep | Total District(s) | 2014 | 1 | 0 | 0 | 1 | 2 | 0 | 0 |
| 10674 | Puducherry | Karaikal | 2014 | 3 | 1 | 0 | 12 | 1 | 1 | 0 |
| 10675 | Puducherry | Puducherry | 2014 | 7 | 6 | 1 | 20 | 7 | 3 | 0 |
| 10676 | Puducherry | Total District(s) | 2014 | 10 | 7 | 1 | 32 | 8 | 4 | 0 |
10677 rows × 10 columns
## To check null values present in the dataset
data.isnull().sum()
STATE/UT 0 DISTRICT 0 Year 0 Rape 0 Kidnapping and Abduction 0 Dowry Deaths 0 Assault on women with intent to outrage her modesty 0 Insult to modesty of Women 0 Cruelty by Husband or his Relatives 0 Importation of Girls 0 dtype: int64
#to see the unique states and union territories
data['STATE/UT'].unique()
array(['ANDHRA PRADESH', 'ARUNACHAL PRADESH', 'ASSAM', 'BIHAR',
'CHHATTISGARH', 'GOA', 'GUJARAT', 'HARYANA', 'HIMACHAL PRADESH',
'JAMMU & KASHMIR', 'JHARKHAND', 'KARNATAKA', 'KERALA',
'MADHYA PRADESH', 'MAHARASHTRA', 'MANIPUR', 'MEGHALAYA', 'MIZORAM',
'NAGALAND', 'ODISHA', 'PUNJAB', 'RAJASTHAN', 'SIKKIM',
'TAMIL NADU', 'TRIPURA', 'UTTAR PRADESH', 'UTTARAKHAND',
'WEST BENGAL', 'A & N ISLANDS', 'CHANDIGARH', 'D & N HAVELI',
'DAMAN & DIU', 'DELHI', 'LAKSHADWEEP', 'PUDUCHERRY',
'Andhra Pradesh', 'Arunachal Pradesh', 'Assam', 'Bihar',
'Chhattisgarh', 'Goa', 'Gujarat', 'Haryana', 'Himachal Pradesh',
'Jammu & Kashmir', 'Jharkhand', 'Karnataka', 'Kerala',
'Madhya Pradesh', 'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram',
'Nagaland', 'Odisha', 'Punjab', 'Rajasthan', 'Sikkim',
'Tamil Nadu', 'Tripura', 'Uttar Pradesh', 'Uttarakhand',
'West Bengal', 'A&N Islands', 'Chandigarh', 'D&N Haveli',
'Daman & Diu', 'Delhi UT', 'Lakshadweep', 'Puducherry',
'Telangana', 'A & N Islands'], dtype=object)
data['DISTRICT'].unique()
array(['ADILABAD', 'ANANTAPUR', 'CHITTOOR', ..., 'Lakshadweep',
'Karaikal', 'Puducherry'], dtype=object)
data['Year'].unique()
array([2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
2012, 2013, 2014], dtype=int64)
#since the states are repeated converting all states to upper and comibing the same states
data.loc[data['STATE/UT'] == 'A&N Islands', 'STATE/UT'] = 'A & N ISLANDS'
data.loc[data['STATE/UT'] == 'D&N Haveli', 'STATE/UT'] = 'D & N HAVELI'
data.loc[data['STATE/UT'] == 'Delhi UT', 'STATE/UT'] = 'DELHI'
#converting all the state names to capitals
data['STATE/UT'] = pd.Series(str.upper(i) for i in data['STATE/UT'])
data['DISTRICT'] = pd.Series(str.upper(i) for i in data['DISTRICT'])
#stroring the sum of all crimes comitted within a state statewise
state_all_crimes = data.groupby('STATE/UT').sum()
#droping the sum of year column
state_all_crimes.drop('Year',axis=1,inplace=True)
#adding a column containig the total crime against women in that state
col_list= list(state_all_crimes)
state_all_crimes['Total']=state_all_crimes[col_list].sum(axis=1)
all_crimes = state_all_crimes
all_crimes
| Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | Total | |
|---|---|---|---|---|---|---|---|---|
| STATE/UT | ||||||||
| A & N ISLANDS | 336 | 212 | 20 | 600 | 112 | 288 | 0 | 1568 |
| ANDHRA PRADESH | 32150 | 34504 | 13844 | 126952 | 86964 | 280906 | 34 | 575354 |
| ARUNACHAL PRADESH | 1316 | 1470 | 6 | 1996 | 48 | 476 | 0 | 5312 |
| ASSAM | 40190 | 62074 | 3268 | 38256 | 254 | 115300 | 22 | 259364 |
| BIHAR | 30758 | 57086 | 32206 | 16958 | 694 | 69770 | 904 | 208376 |
| CHANDIGARH | 770 | 1682 | 90 | 1088 | 342 | 2080 | 0 | 6052 |
| CHHATTISGARH | 29308 | 11808 | 2758 | 47096 | 3968 | 23436 | 12 | 118386 |
| D & N HAVELI | 132 | 224 | 2 | 106 | 12 | 90 | 0 | 566 |
| DAMAN & DIU | 60 | 44 | 6 | 40 | 4 | 76 | 0 | 230 |
| DELHI | 20312 | 46586 | 3758 | 30742 | 7928 | 42834 | 2 | 152162 |
| GOA | 1062 | 640 | 38 | 1384 | 468 | 532 | 0 | 4124 |
| GUJARAT | 11644 | 34670 | 1108 | 23186 | 3230 | 146468 | 0 | 220306 |
| HARYANA | 17110 | 20016 | 7372 | 17142 | 17866 | 68414 | 4 | 147924 |
| HIMACHAL PRADESH | 4674 | 4116 | 112 | 9256 | 1220 | 7796 | 0 | 27174 |
| JAMMU & KASHMIR | 7038 | 21164 | 294 | 28638 | 9168 | 5390 | 0 | 71692 |
| JHARKHAND | 22826 | 14186 | 7896 | 9898 | 566 | 23910 | 298 | 79580 |
| KARNATAKA | 15056 | 16262 | 7016 | 65750 | 2276 | 72706 | 94 | 179160 |
| KERALA | 20030 | 4452 | 700 | 80438 | 7930 | 111626 | 0 | 225176 |
| LAKSHADWEEP | 20 | 2 | 0 | 14 | 4 | 14 | 0 | 54 |
| MADHYA PRADESH | 90996 | 35608 | 21090 | 195478 | 20964 | 102816 | 134 | 467086 |
| MAHARASHTRA | 48974 | 30368 | 9696 | 114704 | 32634 | 193202 | 6 | 429584 |
| MANIPUR | 1068 | 2606 | 6 | 1322 | 6 | 578 | 0 | 5586 |
| MEGHALAYA | 2642 | 670 | 36 | 1470 | 124 | 460 | 8 | 5410 |
| MIZORAM | 2070 | 30 | 8 | 1988 | 70 | 134 | 6 | 4306 |
| NAGALAND | 562 | 190 | 2 | 260 | 24 | 32 | 2 | 1072 |
| ODISHA | 30480 | 25588 | 10782 | 80214 | 7488 | 49206 | 36 | 203794 |
| PUDUCHERRY | 208 | 306 | 56 | 1314 | 600 | 234 | 0 | 2718 |
| PUNJAB | 14656 | 15096 | 3524 | 12410 | 1728 | 30840 | 4 | 78258 |
| RAJASTHAN | 45684 | 66278 | 11854 | 83362 | 794 | 262200 | 14 | 470186 |
| SIKKIM | 570 | 180 | 4 | 586 | 34 | 108 | 0 | 1482 |
| TAMIL NADU | 16660 | 30908 | 5060 | 43404 | 21170 | 45524 | 30 | 162756 |
| TELANGANA | 1958 | 1422 | 578 | 6376 | 2284 | 12738 | 0 | 25356 |
| TRIPURA | 5060 | 2202 | 752 | 7344 | 114 | 16086 | 0 | 31558 |
| UTTAR PRADESH | 51150 | 135906 | 57256 | 91212 | 53130 | 193738 | 6 | 582398 |
| UTTARAKHAND | 3752 | 6484 | 1974 | 4366 | 3190 | 9756 | 2 | 29524 |
| WEST BENGAL | 47876 | 61158 | 12308 | 66908 | 5348 | 344124 | 254 | 537976 |
#sorting the statewise crime from highest to lowest
state_all_crimes.sort_values('Total',ascending=False)
state_all_crimes
| Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | Total | |
|---|---|---|---|---|---|---|---|---|
| STATE/UT | ||||||||
| A & N ISLANDS | 336 | 212 | 20 | 600 | 112 | 288 | 0 | 1568 |
| ANDHRA PRADESH | 32150 | 34504 | 13844 | 126952 | 86964 | 280906 | 34 | 575354 |
| ARUNACHAL PRADESH | 1316 | 1470 | 6 | 1996 | 48 | 476 | 0 | 5312 |
| ASSAM | 40190 | 62074 | 3268 | 38256 | 254 | 115300 | 22 | 259364 |
| BIHAR | 30758 | 57086 | 32206 | 16958 | 694 | 69770 | 904 | 208376 |
| CHANDIGARH | 770 | 1682 | 90 | 1088 | 342 | 2080 | 0 | 6052 |
| CHHATTISGARH | 29308 | 11808 | 2758 | 47096 | 3968 | 23436 | 12 | 118386 |
| D & N HAVELI | 132 | 224 | 2 | 106 | 12 | 90 | 0 | 566 |
| DAMAN & DIU | 60 | 44 | 6 | 40 | 4 | 76 | 0 | 230 |
| DELHI | 20312 | 46586 | 3758 | 30742 | 7928 | 42834 | 2 | 152162 |
| GOA | 1062 | 640 | 38 | 1384 | 468 | 532 | 0 | 4124 |
| GUJARAT | 11644 | 34670 | 1108 | 23186 | 3230 | 146468 | 0 | 220306 |
| HARYANA | 17110 | 20016 | 7372 | 17142 | 17866 | 68414 | 4 | 147924 |
| HIMACHAL PRADESH | 4674 | 4116 | 112 | 9256 | 1220 | 7796 | 0 | 27174 |
| JAMMU & KASHMIR | 7038 | 21164 | 294 | 28638 | 9168 | 5390 | 0 | 71692 |
| JHARKHAND | 22826 | 14186 | 7896 | 9898 | 566 | 23910 | 298 | 79580 |
| KARNATAKA | 15056 | 16262 | 7016 | 65750 | 2276 | 72706 | 94 | 179160 |
| KERALA | 20030 | 4452 | 700 | 80438 | 7930 | 111626 | 0 | 225176 |
| LAKSHADWEEP | 20 | 2 | 0 | 14 | 4 | 14 | 0 | 54 |
| MADHYA PRADESH | 90996 | 35608 | 21090 | 195478 | 20964 | 102816 | 134 | 467086 |
| MAHARASHTRA | 48974 | 30368 | 9696 | 114704 | 32634 | 193202 | 6 | 429584 |
| MANIPUR | 1068 | 2606 | 6 | 1322 | 6 | 578 | 0 | 5586 |
| MEGHALAYA | 2642 | 670 | 36 | 1470 | 124 | 460 | 8 | 5410 |
| MIZORAM | 2070 | 30 | 8 | 1988 | 70 | 134 | 6 | 4306 |
| NAGALAND | 562 | 190 | 2 | 260 | 24 | 32 | 2 | 1072 |
| ODISHA | 30480 | 25588 | 10782 | 80214 | 7488 | 49206 | 36 | 203794 |
| PUDUCHERRY | 208 | 306 | 56 | 1314 | 600 | 234 | 0 | 2718 |
| PUNJAB | 14656 | 15096 | 3524 | 12410 | 1728 | 30840 | 4 | 78258 |
| RAJASTHAN | 45684 | 66278 | 11854 | 83362 | 794 | 262200 | 14 | 470186 |
| SIKKIM | 570 | 180 | 4 | 586 | 34 | 108 | 0 | 1482 |
| TAMIL NADU | 16660 | 30908 | 5060 | 43404 | 21170 | 45524 | 30 | 162756 |
| TELANGANA | 1958 | 1422 | 578 | 6376 | 2284 | 12738 | 0 | 25356 |
| TRIPURA | 5060 | 2202 | 752 | 7344 | 114 | 16086 | 0 | 31558 |
| UTTAR PRADESH | 51150 | 135906 | 57256 | 91212 | 53130 | 193738 | 6 | 582398 |
| UTTARAKHAND | 3752 | 6484 | 1974 | 4366 | 3190 | 9756 | 2 | 29524 |
| WEST BENGAL | 47876 | 61158 | 12308 | 66908 | 5348 | 344124 | 254 | 537976 |
state_all_crimes.info()
<class 'pandas.core.frame.DataFrame'> Index: 36 entries, A & N ISLANDS to WEST BENGAL Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Rape 36 non-null int64 1 Kidnapping and Abduction 36 non-null int64 2 Dowry Deaths 36 non-null int64 3 Assault on women with intent to outrage her modesty 36 non-null int64 4 Insult to modesty of Women 36 non-null int64 5 Cruelty by Husband or his Relatives 36 non-null int64 6 Importation of Girls 36 non-null int64 7 Total 36 non-null int64 dtypes: int64(8) memory usage: 2.5+ KB
state_all_crimes=state_all_crimes.reset_index()
##CRIME STATUS
#all crimes count
total_df=state_all_crimes.sum(axis=0).reset_index()
tf=pd.DataFrame(total_df)
tf
| index | 0 | |
|---|---|---|
| 0 | STATE/UT | A & N ISLANDSANDHRA PRADESHARUNACHAL PRADESHAS... |
| 1 | Rape | 619158 |
| 2 | Kidnapping and Abduction | 746198 |
| 3 | Dowry Deaths | 215480 |
| 4 | Assault on women with intent to outrage her mo... | 1212258 |
| 5 | Insult to modesty of Women | 292756 |
| 6 | Cruelty by Husband or his Relatives | 2233888 |
| 7 | Importation of Girls | 1872 |
| 8 | Total | 5321610 |
#for all crimes count
#tf = drop
tf=tf.drop([0])
tf=tf.drop([8])
import plotly.express as px ##provides functions to visualize a variety of types of data
#plotting
#state v/s total crimes
sorted_df = state_all_crimes.sort_values('Total',ascending=False)
fig = px.bar( x=tf["index"],y=tf[0], color=tf[0],
labels={'x': "Crimes", 'y': "Count"}, title="Total Cases",
color_continuous_scale='burg')
fig.show()
#state v/s total crimes
sorted_df = state_all_crimes.sort_values('Total',ascending=False)
fig = px.bar( x=sorted_df['STATE/UT'],y=sorted_df["Total"], color=sorted_df["Total"],
labels={'x': "States", 'y': "Count"}, title="Total Cases",
color_continuous_scale='burg')
fig.show()
#states v/s rapes
fig = px.bar( x=state_all_crimes['STATE/UT'],y=state_all_crimes["Rape"], color=state_all_crimes["Rape"],
labels={'x': "States", 'y': "Count"}, title="Rape Cases",
color_continuous_scale='burg')
fig.show()
#states v/s kidnapping and abduction
fig = px.bar( x=state_all_crimes['STATE/UT'],y=state_all_crimes["Kidnapping and Abduction"], color=state_all_crimes["Kidnapping and Abduction"],
labels={'x': "States", 'y': "Count"}, title="Kidnapping and Abduction Cases",
color_continuous_scale='burg')
fig.show()
#states v/s Dowry Deaths
fig = px.bar( x=state_all_crimes['STATE/UT'],y=state_all_crimes["Dowry Deaths"], color=state_all_crimes["Dowry Deaths"],
labels={'x': "States", 'y': "Count"}, title="Dowry Deaths",
color_continuous_scale='burg')
fig.show()
#states v/s Assault on women with intent to outrage her modesty
fig = px.bar( x=state_all_crimes['STATE/UT'],y=state_all_crimes["Assault on women with intent to outrage her modesty"], color=state_all_crimes["Assault on women with intent to outrage her modesty"],
labels={'x': "States", 'y': "Count"}, title="Assault on women with intent to outrage her modesty",
color_continuous_scale='burg')
fig.show()
#states v/s Insult to modesty of Women
fig = px.bar( x=state_all_crimes['STATE/UT'],y=state_all_crimes["Insult to modesty of Women"], color=state_all_crimes["Insult to modesty of Women"],
labels={'x': "States", 'y': "Count"}, title="Insult to modesty of Women",
color_continuous_scale='burg')
fig.show()
#states v/s Cruelty by Husband or his Relatives
fig = px.bar( x=state_all_crimes['STATE/UT'],y=state_all_crimes["Cruelty by Husband or his Relatives"], color=state_all_crimes["Cruelty by Husband or his Relatives"],
labels={'x': "States", 'y': "Count"}, title="Cruelty by Husband or his Relatives",
color_continuous_scale='burg')
fig.show()
#states v/s Importation of Girls
importation_df = state_all_crimes.copy()
importation_df.loc[importation_df['Importation of Girls'] <= 50, 'STATE/UT'] = 'Others'
# Represent only large countries
fig = px.pie(importation_df, values='Importation of Girls', names='STATE/UT', title="Importation of Girls", color_discrete_sequence=px.colors.sequential.Teal_r)
fig.update_traces(textposition='inside', textinfo='label+value', marker=dict(line=dict(color='#000000', width=2)))
#fig.update_layout(annotations=[dict(text='count', x=0.5, y=0.5, font_size=20, showarrow=False)])
fig.show()
plt.figure(figsize=(15,7))
data.groupby("Year")["Rape"].sum().plot()
data.groupby("Year")["Kidnapping and Abduction"].sum().plot()
data.groupby("Year")["Dowry Deaths"].sum().plot(label="Dowry Deaths")
data.groupby("Year")["Assault on women with intent to outrage her modesty"].sum().plot()
data.groupby("Year")["Insult to modesty of Women"].sum().plot()
data.groupby("Year")["Cruelty by Husband or his Relatives"].sum().plot()
data.groupby("Year")["Importation of Girls"].sum().plot()
plt.legend()
plt.tight_layout()
all_crimes = all_crimes.reset_index()
all_crimes
| STATE/UT | Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | Total | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | A & N ISLANDS | 336 | 212 | 20 | 600 | 112 | 288 | 0 | 1568 |
| 1 | ANDHRA PRADESH | 32150 | 34504 | 13844 | 126952 | 86964 | 280906 | 34 | 575354 |
| 2 | ARUNACHAL PRADESH | 1316 | 1470 | 6 | 1996 | 48 | 476 | 0 | 5312 |
| 3 | ASSAM | 40190 | 62074 | 3268 | 38256 | 254 | 115300 | 22 | 259364 |
| 4 | BIHAR | 30758 | 57086 | 32206 | 16958 | 694 | 69770 | 904 | 208376 |
| 5 | CHANDIGARH | 770 | 1682 | 90 | 1088 | 342 | 2080 | 0 | 6052 |
| 6 | CHHATTISGARH | 29308 | 11808 | 2758 | 47096 | 3968 | 23436 | 12 | 118386 |
| 7 | D & N HAVELI | 132 | 224 | 2 | 106 | 12 | 90 | 0 | 566 |
| 8 | DAMAN & DIU | 60 | 44 | 6 | 40 | 4 | 76 | 0 | 230 |
| 9 | DELHI | 20312 | 46586 | 3758 | 30742 | 7928 | 42834 | 2 | 152162 |
| 10 | GOA | 1062 | 640 | 38 | 1384 | 468 | 532 | 0 | 4124 |
| 11 | GUJARAT | 11644 | 34670 | 1108 | 23186 | 3230 | 146468 | 0 | 220306 |
| 12 | HARYANA | 17110 | 20016 | 7372 | 17142 | 17866 | 68414 | 4 | 147924 |
| 13 | HIMACHAL PRADESH | 4674 | 4116 | 112 | 9256 | 1220 | 7796 | 0 | 27174 |
| 14 | JAMMU & KASHMIR | 7038 | 21164 | 294 | 28638 | 9168 | 5390 | 0 | 71692 |
| 15 | JHARKHAND | 22826 | 14186 | 7896 | 9898 | 566 | 23910 | 298 | 79580 |
| 16 | KARNATAKA | 15056 | 16262 | 7016 | 65750 | 2276 | 72706 | 94 | 179160 |
| 17 | KERALA | 20030 | 4452 | 700 | 80438 | 7930 | 111626 | 0 | 225176 |
| 18 | LAKSHADWEEP | 20 | 2 | 0 | 14 | 4 | 14 | 0 | 54 |
| 19 | MADHYA PRADESH | 90996 | 35608 | 21090 | 195478 | 20964 | 102816 | 134 | 467086 |
| 20 | MAHARASHTRA | 48974 | 30368 | 9696 | 114704 | 32634 | 193202 | 6 | 429584 |
| 21 | MANIPUR | 1068 | 2606 | 6 | 1322 | 6 | 578 | 0 | 5586 |
| 22 | MEGHALAYA | 2642 | 670 | 36 | 1470 | 124 | 460 | 8 | 5410 |
| 23 | MIZORAM | 2070 | 30 | 8 | 1988 | 70 | 134 | 6 | 4306 |
| 24 | NAGALAND | 562 | 190 | 2 | 260 | 24 | 32 | 2 | 1072 |
| 25 | ODISHA | 30480 | 25588 | 10782 | 80214 | 7488 | 49206 | 36 | 203794 |
| 26 | PUDUCHERRY | 208 | 306 | 56 | 1314 | 600 | 234 | 0 | 2718 |
| 27 | PUNJAB | 14656 | 15096 | 3524 | 12410 | 1728 | 30840 | 4 | 78258 |
| 28 | RAJASTHAN | 45684 | 66278 | 11854 | 83362 | 794 | 262200 | 14 | 470186 |
| 29 | SIKKIM | 570 | 180 | 4 | 586 | 34 | 108 | 0 | 1482 |
| 30 | TAMIL NADU | 16660 | 30908 | 5060 | 43404 | 21170 | 45524 | 30 | 162756 |
| 31 | TELANGANA | 1958 | 1422 | 578 | 6376 | 2284 | 12738 | 0 | 25356 |
| 32 | TRIPURA | 5060 | 2202 | 752 | 7344 | 114 | 16086 | 0 | 31558 |
| 33 | UTTAR PRADESH | 51150 | 135906 | 57256 | 91212 | 53130 | 193738 | 6 | 582398 |
| 34 | UTTARAKHAND | 3752 | 6484 | 1974 | 4366 | 3190 | 9756 | 2 | 29524 |
| 35 | WEST BENGAL | 47876 | 61158 | 12308 | 66908 | 5348 | 344124 | 254 | 537976 |
all_crimes.shape
(36, 9)
#finding the mean number of crimes
m = all_crimes['Total'].mean()
print('mean=',m)
#finding the quantiles
q = np.quantile(all_crimes['Total'],[0.25,0.75])
print(q)
l=q[0]
u=q[1]
mean= 147822.5 [ 5060.5 211358.5]
#copying the state_all_crimes to a new dataframe to normalise values and predict
df_kmeans = all_crimes.loc[:,all_crimes.columns!="STATE/UT"]
#adding an additional column called output
output=[]
for i in df_kmeans['Total']:
if i >= m:
output.append(1)#redzone
elif m > i:
output.append(0)#safe
all_crimes['output']=output
df_kmeans_y=all_crimes['output']
from sklearn.preprocessing import MinMaxScaler
cols = df_kmeans.columns
cols
Index(['Rape', 'Kidnapping and Abduction', 'Dowry Deaths',
'Assault on women with intent to outrage her modesty',
'Insult to modesty of Women', 'Cruelty by Husband or his Relatives',
'Importation of Girls', 'Total'],
dtype='object')
ms = MinMaxScaler()
df_kmeans = ms.fit_transform(df_kmeans)
df_kmeans = pd.DataFrame(df_kmeans,columns=[cols])
df_kmeans.head()
| Rape | Kidnapping and Abduction | Dowry Deaths | Assault on women with intent to outrage her modesty | Insult to modesty of Women | Cruelty by Husband or his Relatives | Importation of Girls | Total | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0.003473 | 0.001545 | 0.000349 | 0.002998 | 0.001242 | 0.000796 | 0.000000 | 0.002600 |
| 1 | 0.353170 | 0.253870 | 0.241791 | 0.649419 | 1.000000 | 0.816285 | 0.037611 | 0.987904 |
| 2 | 0.014246 | 0.010802 | 0.000105 | 0.010140 | 0.000506 | 0.001343 | 0.000000 | 0.009029 |
| 3 | 0.441545 | 0.456734 | 0.057077 | 0.195647 | 0.002875 | 0.335027 | 0.024336 | 0.445287 |
| 4 | 0.337869 | 0.420032 | 0.562491 | 0.086686 | 0.007935 | 0.202714 | 1.000000 | 0.357730 |
K-means clustering to classify the states into safe or unsafe.
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2, random_state=0)
kmeans.fit(df_kmeans)
KMeans(n_clusters=2, random_state=0)
kmeans.inertia_
6.49248832628424
#checking the accuracy
labels = kmeans.labels_
# check how many of the samples were correctly labeled
correct_labels = sum(df_kmeans_y == labels)
print('labels:',labels)
print('df_kmeans output:',df_kmeans_y)
print("Result: %d out of %d samples were correctly labeled." % (correct_labels, df_kmeans_y.size))
labels: [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1] df_kmeans output: 0 0 1 1 2 0 3 1 4 1 5 0 6 0 7 0 8 0 9 1 10 0 11 1 12 1 13 0 14 0 15 0 16 1 17 1 18 0 19 1 20 1 21 0 22 0 23 0 24 0 25 1 26 0 27 0 28 1 29 0 30 1 31 0 32 0 33 1 34 0 35 1 Name: output, dtype: int64 Result: 27 out of 36 samples were correctly labeled.
#based on the prediction of the k means algorithm classifying the states
#as safe or unsafe for women
final=[]
for i in range(len(labels)):
state=all_crimes['STATE/UT'][i]
label = labels[i]
if label == 1:
final.append([state,'unsafe'])
else:
final.append([state,'safe'])
final_df = pd.DataFrame(final, columns=['STATES/UT', 'SAFE/UNSAFE'])
final_df
| STATES/UT | SAFE/UNSAFE | |
|---|---|---|
| 0 | A & N ISLANDS | safe |
| 1 | ANDHRA PRADESH | unsafe |
| 2 | ARUNACHAL PRADESH | safe |
| 3 | ASSAM | safe |
| 4 | BIHAR | safe |
| 5 | CHANDIGARH | safe |
| 6 | CHHATTISGARH | safe |
| 7 | D & N HAVELI | safe |
| 8 | DAMAN & DIU | safe |
| 9 | DELHI | safe |
| 10 | GOA | safe |
| 11 | GUJARAT | safe |
| 12 | HARYANA | safe |
| 13 | HIMACHAL PRADESH | safe |
| 14 | JAMMU & KASHMIR | safe |
| 15 | JHARKHAND | safe |
| 16 | KARNATAKA | safe |
| 17 | KERALA | safe |
| 18 | LAKSHADWEEP | safe |
| 19 | MADHYA PRADESH | unsafe |
| 20 | MAHARASHTRA | unsafe |
| 21 | MANIPUR | safe |
| 22 | MEGHALAYA | safe |
| 23 | MIZORAM | safe |
| 24 | NAGALAND | safe |
| 25 | ODISHA | safe |
| 26 | PUDUCHERRY | safe |
| 27 | PUNJAB | safe |
| 28 | RAJASTHAN | unsafe |
| 29 | SIKKIM | safe |
| 30 | TAMIL NADU | safe |
| 31 | TELANGANA | safe |
| 32 | TRIPURA | safe |
| 33 | UTTAR PRADESH | unsafe |
| 34 | UTTARAKHAND | safe |
| 35 | WEST BENGAL | unsafe |
Based on the above classification the laws in the those states can be made more stringent